This note project folder demonstrate the basics of spatial data analysis with R.
library(tidyverse)
library(ggthemes)
theme_set(theme_map())
d = read_csv("Lec_09/data/GEDEvent_v22_1.csv")
names(d)
## [1] "id" "relid" "year"
## [4] "active_year" "code_status" "type_of_violence"
## [7] "conflict_dset_id" "conflict_new_id" "conflict_name"
## [10] "dyad_dset_id" "dyad_new_id" "dyad_name"
## [13] "side_a_dset_id" "side_a_new_id" "side_a"
## [16] "side_b_dset_id" "side_b_new_id" "side_b"
## [19] "number_of_sources" "source_article" "source_office"
## [22] "source_date" "source_headline" "source_original"
## [25] "where_prec" "where_coordinates" "where_description"
## [28] "adm_1" "adm_2" "latitude"
## [31] "longitude" "geom_wkt" "priogrid_gid"
## [34] "country" "country_id" "region"
## [37] "event_clarity" "date_prec" "date_start"
## [40] "date_end" "deaths_a" "deaths_b"
## [43] "deaths_civilians" "deaths_unknown" "best"
## [46] "high" "low" "gwnoa"
## [49] "gwnob"
Draw a world map featuring the variation of conflicts among countries in the year of 2021.
Create a dataframe named d_country_2021 that summarizes
the number of conflicts in each country in the year of 2021.
d_country_2021 = d |>
filter(year == 2021) |>
group_by(country, country_id, region) |>
summarise(
n_conflict = n()
)
d_country_2021 = d_country_2021 |> arrange(-n_conflict)
world = map_data("world")
names(world)
## [1] "long" "lat" "group" "order" "region" "subregion"
# Summary statistics
summary(world)
## long lat group order
## Min. :-180.00 Min. :-85.192 Min. : 1.0 Min. : 1
## 1st Qu.: -67.86 1st Qu.: 1.795 1st Qu.: 381.0 1st Qu.: 25215
## Median : 16.59 Median : 32.467 Median : 823.0 Median : 50492
## Mean : 10.52 Mean : 24.969 Mean : 801.3 Mean : 50470
## 3rd Qu.: 78.95 3rd Qu.: 54.029 3rd Qu.:1243.0 3rd Qu.: 75746
## Max. : 190.27 Max. : 83.600 Max. :1627.0 Max. :100964
## region subregion
## Length:99338 Length:99338
## Class :character Class :character
## Mode :character Mode :character
##
##
##
# Problem: Max. longitude > 180! Remove those
world = world |> filter(long <= 180)
# Plot the world map
world |>
ggplot(aes(x = long, y = lat)) +
geom_map(
map = world,
aes(map_id = region),
color = "white", fill = "gray", size = 0.1
) +
coord_map()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
world_m = world |>
left_join(d_country_2021, by = c("region" = "country"))
Check what the merged data look like.
Color the countries with the number of conflicts in 2021.
world_m |>
ggplot(aes(x = long, y = lat)) +
geom_map(
map = world,
aes(map_id = region, fill = n_conflict),
color = "black", size = 0.1
) +
coord_map()
Our next step is making the above map more informative
Try the scale_fill_viridis_c() function. This should
give you the most reliable color scheme.
option |
A character string indicating the color map option to use. Eight options are available:
|
| direction | Sets the order of colors in the scale. If 1, the default, colors are ordered from darkest to lightest. If -1, the order of colors is reversed. |
world_m |>
ggplot(aes(x = long, y = lat)) +
geom_map(
map = world,
aes(map_id = region, fill = n_conflict),
color = "white", size = 0.1
) +
scale_fill_viridis_c(option = "A") +
coord_map()
world_m |>
ggplot(aes(x = long, y = lat)) +
geom_map(
map = world,
aes(map_id = region, fill = n_conflict),
color = "white", size = 0.1
) +
scale_fill_viridis_c(option = "B") +
coord_map()
world_m |>
ggplot(aes(x = long, y = lat)) +
geom_map(
map = world,
aes(map_id = region, fill = n_conflict),
color = "white", size = 0.1
) +
scale_fill_viridis_c(option = "B", direction = -1) +
coord_map()
world_m |>
ggplot(aes(x = long, y = lat)) +
geom_map(
map = world,
aes(map_id = region, fill = n_conflict),
color = "white", size = 0.1
) +
scale_fill_viridis_c(option = "D", direction = -1) +
coord_map()
Try the other specifications of color schemes.
The most robust way to change the legend’s name is the rename the variable.
Note: When your variable name contains space, you should put “`” around it.
world_m |>
rename("Number of Conflicts" = "n_conflict") |>
ggplot(aes(x = long, y = lat)) +
geom_map(
map = world,
aes(map_id = region, fill = `Number of Conflicts`),
color = "white", size = 0.1
) +
scale_fill_viridis_c(option = "B", direction = -1) +
coord_map()
The distribution of the conflict frequencies is so skewed that makes the differences of low-conflict countries indistinguishable. Transform the data or the scales to take into account the skewed data.
world_m |>
rename("Number of Conflicts" = "n_conflict") |>
ggplot(aes(x = long, y = lat)) +
geom_map(
map = world,
aes(map_id = region, fill = `Number of Conflicts`),
color = "white", size = 0.1
) +
scale_fill_viridis_c(option = "B", direction = -1, trans = "log") +
coord_map()
Or we can transform the data
world_m |>
rename("Number of Conflicts" = "n_conflict") |>
ggplot(aes(x = long, y = lat)) +
geom_map(
map = world,
aes(map_id = region, fill = log(`Number of Conflicts`+1)),
color = "white", size = 0.1
) +
scale_fill_viridis_c(option = "B", direction = -1) +
coord_map() +
theme(legend.position = "bottom")
The maps we have drawn in this notebook are named “choropleth maps.”
“A choropleth map displays divided geographical areas or regions that are colored in relation to a numeric variable.”